• BioProject Accession: PRJNA632846
  • GEO Accession: GSE150570



Load required packages.

library(tidyverse)
library(magrittr)
library(Matrix)
library(patchwork)
library(extrafont)
# library(tidylog)
Sys.time()
## [1] "2021-02-03 20:59:43 CST"

Data preparation

Functions loading

source(
    file = file.path(
        SCRIPT_DIR,
        "utilities.R"
    )
)

load_matrix <- function(x) {
    matrix_readcount_use <- scipy.sparse$load_npz(
        file.path(x, "matrix_readcount.npz")
    )

    colnames(matrix_readcount_use) <- np$load(
        file.path(x, "matrix_readcount_barcodes.npy")
    )

    rownames(matrix_readcount_use) <- np$load(
        file.path(x, "matrix_readcount_features.npy")
    )

    return(matrix_readcount_use)
}

Data loading

PROJECT_DIR <- "/Users/jialei/Dropbox/Data/Projects/UTSW/Human_blastoid"

Matrix

np <- reticulate::import("numpy", convert = TRUE)
scipy.sparse <- reticulate::import(module = "scipy.sparse", convert = TRUE)
matrix_dir <- list(
    "github/data/matrices/LW36",
    "github/data/matrices/LW49_LW50_LW51_LW52",
    "github/data/matrices/LW58_LW59",
    "github/data/matrices/LW60_LW61",
    "raw/public/PRJEB11202/reformatted_matrix"
)

matrix_readcount_use <- purrr::map(matrix_dir, function(x) {
    load_matrix(file.path(PROJECT_DIR, x))
}) %>%
    purrr::reduce(cbind)

walk(list(matrix_readcount_use, matrix_dir), function(x) {
    print(object.size(x), units = "auto", standard = "SI")
})
## 1.6 GB
## 808 B
# clean up
rm(matrix_dir)

Embedding

EMBEDDING_FILE <- "embedding.csv.gz"

embedding <- read_csv(
    file = file.path(
        PROJECT_DIR,
        "github",
        "data",
        EMBEDDING_FILE
    )
)

# clean up
rm(EMBEDDING_FILE)

Metadata

cell_metadata_PRJEB11202 <- read_delim(
    file.path(
        PROJECT_DIR,
        "raw/public/PRJEB11202/",
        "E-MTAB-3929.sdrf.tsv"
    ),
    delim = "\t"
) %>%
    dplyr::select(
        `Comment[ENA_SAMPLE]`,
        `Comment[ENA_RUN]`,
        `Characteristics[developmental stage]`,
        `Characteristics[inferred lineage]`
    ) %>%
    dplyr::rename(
        cell = `Comment[ENA_SAMPLE]`,
        run = `Comment[ENA_RUN]`,
        developmental_stage = `Characteristics[developmental stage]`,
        lineage = `Characteristics[inferred lineage]`
    ) %>%
    mutate(
        developmental_stage = str_replace(
            string = developmental_stage,
            pattern = "embryonic day ",
            replacement = "E"
        ),
        lineage = case_when(
            lineage == "epiblast" ~ "EPI",
            lineage == "primitive endoderm" ~ "HYP",
            lineage == "trophectoderm" ~ "TE",
            lineage == "not applicable" ~ "Pre-lineage"
        )
    )

embedding %<>%
    left_join(
        cell_metadata_PRJEB11202
    ) %>%
    mutate(
        lineage = case_when(
            is.na(lineage) ~ "Blastoid",
            TRUE ~ as.character(lineage)
        ),
        developmental_stage = case_when(
            is.na(developmental_stage) ~ "Blastoid",
            TRUE ~ as.character(developmental_stage)
        )
    )
matrix_readcount_use <- matrix_readcount_use[, embedding$cell]
matrix_cpm_use <- calc_cpm(matrix_readcount_use)

walk(list(matrix_readcount_use, matrix_cpm_use), function(x) {
    print(object.size(x), units = "auto", standard = "SI")
})
## 496.5 MB
## 496.5 MB
# clean up
rm(cell_metadata_PRJEB11202)
embedding %>%
    mutate(
        num_umis = colSums(matrix_readcount_use[, cell]),
        num_genes = colSums(matrix_readcount_use[, cell] > 0)
    ) %>%
    group_by(louvain) %>%
    summarise(
        num_cells = n(),
        median_umis = median(num_umis),
        median_genes = median(num_genes)
    ) %>%
    gt::gt() %>%
    gt::tab_options(table.font.size = "median")
louvain num_cells median_umis median_genes
0 1533 11534.0 3074.0
1 1113 18092.0 3612.0
2 1079 11308.0 2940.0
3 996 14659.0 3220.5
4 924 1108.5 627.0
5 725 6915.0 1981.0
6 710 9711.5 2158.0
7 672 10544.0 2791.5
8 649 11961.0 2905.0
9 570 19201.0 3656.0
10 410 23705.5 4208.5
11 366 13910.5 3355.5
12 365 16942.0 3437.0
13 275 8010.0 2455.0
14 249 31950.0 4954.0
15 202 8217.0 2452.0
16 138 10242.5 2852.5
17 120 17426.0 3400.0
18 86 18158.0 3853.5


embedding %>%
    mutate(
        num_umis = colSums(matrix_readcount_use[, cell]),
        num_genes = colSums(matrix_readcount_use[, cell] > 0)
    ) %>%
    group_by(batch) %>%
    summarise(
        num_cells = n(),
        median_umis = median(num_umis),
        median_genes = median(num_genes)
    ) %>%
    gt::gt() %>%
    gt::tab_options(table.font.size = "median")
batch num_cells median_umis median_genes
LW60 4497 14421 3337
LW61 5156 7625 2185
PRJEB11202 1529 1551093 10305

Blastoids globally resemble blastocysts


Embedding visualization

EMBEDDING_TITLE_PREFIX <- "UMAP"

x_column <- "x_umap"
y_column <- "y_umap"


Clustering & batch & UMI

p_embedding_batch <- plot_embedding(
    embedding = embedding[, c(x_column, y_column)],
    color_values = embedding$batch %>% as.factor(),
    label = paste0(EMBEDDING_TITLE_PREFIX, "; Batch"),
    label_position = NULL,
    show_color_value_labels = FALSE,
    show_color_legend = TRUE,
    geom_point_size = 0.3,
    sort_values = FALSE
) +
    scale_color_manual(
        values = gg_color_hue(n = length(unique(embedding$batch)))
    ) +
    guides(colour = guide_legend(override.aes = list(size = 3), ncol = 1)) +
    labs(color = NULL) +
    customized_theme()

CB_POSITION <- c(0.875, 0.3)
p_embedding_umi <- plot_embedding_value(
    embedding = embedding[, c(x_column, y_column)],
    color_values = log10(
        Matrix::colSums(matrix_readcount_use[, embedding$cell]) + 1
    ),
    colorbar_position = CB_POSITION,
    label = paste0(EMBEDDING_TITLE_PREFIX, "; UMI"),
    label_position = NULL,
    geom_point_size = 0.6,
    sort_values = TRUE,
    FUN = function(x) x
)

selected_feature <- "ENSG00000204531_POU5F1"
p_embedding_POU5F1 <- plot_embedding_value(
    embedding = embedding[, c(x_column, y_column)],
    color_values = log10(matrix_cpm_use[selected_feature, embedding$cell] + 1),
    colorbar_position = CB_POSITION,
    label = paste0(EMBEDDING_TITLE_PREFIX, "; ", selected_feature),
    label_position = NULL,
    geom_point_size = 0.6,
    sort_values = TRUE,
    FUN = function(x) x
)
purrr::reduce(list(
    p_embedding_batch,
    p_embedding_umi,
    p_embedding_POU5F1
), `+`) +
    patchwork::plot_layout(ncol = 3) +
    patchwork::plot_annotation(
        theme = theme(plot.margin = margin())
    )

Developmental stages

purrr::map(embedding %>%
    tidyr::drop_na() %>%
    pull(developmental_stage) %>%
    unique() %>% sort(), function(x) {
    plot_embedding(
        embedding = embedding[, c(x_column, y_column)],
        color_values = as.numeric(embedding$developmental_stage == x) %>% as.factor(),
        label = paste0(EMBEDDING_TITLE_PREFIX, "; ", x),
        label_position = NULL,
        show_color_value_labels = FALSE,
        show_color_legend = FALSE,
        geom_point_size = 0.5,
        sort_values = TRUE
    ) +
        scale_color_manual(
            values = c("grey70", "salmon")
        )
}) %>%
    purrr::reduce(`+`) +
    plot_layout(ncol = 3) +
    plot_annotation(
        theme = theme(plot.margin = margin())
    )

Polishing

color_palette_cluster <- c(
    "0" = "#8DD3C7",
    "1" = "#9EDAE5FF",
    "2" = "#BEBADA",
    "3" = "#FB8072",
    "4" = "#80B1D3",
    "5" = "#FDB462",
    "6" = "#B3DE69",
    "7" = "#FCCDE5",
    "8" = "#DC863B",
    "9" = "#BC80BD",
    "10" = "#11c638",
    "11" = "#BCBD22FF",
    "12" = "#17BECFFF",
    "13" = "#AEC7E8FF",
    "14" = "#EAD3C6",
    "15" = "#98DF8AFF",
    "16" = "#FF9896FF",
    "17" = "#C5B0D5FF",
    "18" = "#C49C94FF",
    "19" = "#F7B6D2FF",
    "20" = "#D33F6A",
    "21" = "#8E063B",
    "22" = "#023FA5"
)

cluster_labels <- embedding %>%
    dplyr::group_by(.data[["louvain"]]) %>%
    dplyr::summarise(
        x = quantile(.data[[x_column]], 0.5),
        y = quantile(.data[[y_column]], 0.5),
        .groups = "drop"
    ) %>%
    as.data.frame()
cluster_labels[cluster_labels[["louvain"]] == 14, c("x", "y")] <- c(6.8, -1.8)

clusters_unidentified <- c(2, 3, 4, 5, 6, 7, 13, 15, 16)
cluster_labels %<>%
    mutate(
        label = case_when(
            louvain %in% clusters_unidentified ~ paste0("U", as.character(louvain)),
            TRUE ~ as.character(louvain)
        )
    )

# cluster
p_embedding_cluster <- embedding %>%
    arrange(desc(louvain)) %>%
    ggplot(
        aes(
            x = .data[[x_column]],
            y = .data[[y_column]],
            color = .data[["louvain"]] %>% as.factor()
        )
    ) +
    geom_point(
        size = 0.45,
        alpha = 1,
        stroke = 0,
        shape = 16
    ) +
    scale_color_manual(
        values = color_palette_cluster
    ) +
    theme_void() +
    ggplot2::guides(color = "none") +
    ggplot2::annotate(
        "text",
        family = "Arial",
        x = cluster_labels[, "x"],
        y = cluster_labels[, "y"],
        label = cluster_labels[, "label"],
        parse = TRUE,
        size = 2,
        color = c("black")
    )

# lineage
embedding_lineage_plotting <- embedding %>%
    mutate(
        lineage = factor(lineage,
            levels = c("Blastoid", "TE", "HYP", "EPI", "Pre-lineage")
        )
    ) %>%
    arrange(lineage)

p_embedding_lineage <- plot_embedding(
    embedding = embedding_lineage_plotting[, c(x_column, y_column)],
    color_values = embedding_lineage_plotting$lineage %>% as.factor(),
    label = NULL,
    label_position = NULL,
    show_color_value_labels = FALSE,
    show_color_legend = TRUE,
    geom_point_size = 0.4,
    geom_point_alpha = 1,
    sort_values = TRUE
) +
    scale_color_manual(
        name = NULL,
        values = c("grey70", "#8ace7e", "#ff684c", "#9467bd", "#ffd8b1"),
        breaks = c("Blastoid", "EPI", "HYP", "TE", "Pre-lineage"),
        labels = c(
            "Blastoid cells", "Epiblast (EPI)", "Hypoblast (HYP)", 
            "Trophectoderm (TE)", "Pre-lineage")
    ) +
    customized_theme(legend_key_size = 1, legend_text_size = 5) +
    guides(colour = guide_legend(override.aes = list(size = 2)))

# clean up
rm(embedding_lineage_plotting)

p_embedding_developmental_stage <- plot_embedding(
    embedding = embedding[, c(x_column, y_column)],
    color_values = embedding$developmental_stage %>% as.factor(),
    label = NULL,
    label_position = NULL,
    show_color_value_labels = FALSE,
    show_color_legend = TRUE,
    geom_point_size = 0.45,
    geom_point_alpha = 1,
    sort_values = TRUE
) +
    scale_color_manual(
        name = NULL,
        values = c(
            "grey70",
            ggthemes::tableau_color_pal("Tableau 10")(
                length(unique(embedding$developmental_stage))
            )
        ),
        labels = c("Blastoid cells", "E3", "E4", "E5", "E6", "E7")
    ) +
    customized_theme(legend_key_size = 1, legend_text_size = 5) +
    guides(colour = guide_legend(override.aes = list(size = 2)))
list(
    p_embedding_cluster,
    p_embedding_lineage,
    p_embedding_developmental_stage
) %>%
    purrr::reduce(`+`) +
    patchwork::plot_layout(ncol = 1) +
    patchwork::plot_annotation(
        theme = theme(plot.margin = margin())
    )
CB_POSITION <- c(0.875, 0.32)
x_label <- ggplot_build(p_embedding_POU5F1)$layout$panel_params[[1]][c("x.range")] %>%
    unlist() %>%
    quantile(0.575)
y_label <- ggplot_build(p_embedding_POU5F1)$layout$panel_params[[1]][c("y.range")] %>%
    unlist() %>%
    quantile(0.8)

features_selected <- c(
    "ENSG00000181449_SOX2",
    "ENSG00000187498_COL4A1",
    "ENSG00000179348_GATA2"
)

lineage_ <- c(
    "(Epiblast)",
    "(Hypoblast)",
    "(Trophectoderm)"
)

p_embedding_SOX2_COL4A1_GATA2 <- purrr::map2(
    features_selected,
    lineage_, function(x, y) {
        values <- log10(matrix_cpm_use[x, embedding$cell] + 1)
        values[embedding$batch == "PRJEB11202"] <- NA

        idx <- order(values, decreasing = FALSE, na.last = FALSE)

        plot_embedding_value(
            embedding = embedding[idx, c(x_column, y_column)],
            color_values = values[idx],
            colorbar_position = CB_POSITION,
            label = str_c(x %>% str_remove("E.+_"), y, sep = "\n"),
            label_position = c(x_label, y_label),
            geom_point_size = 0.5,
            sort_values = FALSE,
            FUN = function(x) x,
            label_size = 1.8
        )
    }
)

c(
    list(
        p_embedding_cluster,
        p_embedding_lineage,
        p_embedding_developmental_stage
    ),
    p_embedding_SOX2_COL4A1_GATA2
) %>%
    purrr::reduce(`+`) +
    patchwork::plot_layout(ncol = 2, byrow = FALSE) +
    patchwork::plot_annotation(
        theme = theme(plot.margin = margin())
    )


Expression

Embedding

features_selected <- c(
    "ENSG00000204531_POU5F1",
    "ENSG00000203909_DPPA5",
    "ENSG00000184344_GDF3",
    "ENSG00000156574_NODAL",
    "ENSG00000111704_NANOG",
    "ENSG00000075388_FGF4",
    #
    "ENSG00000164736_SOX17",
    "ENSG00000125798_FOXA2",
    "ENSG00000136574_GATA4",
    "ENSG00000141448_GATA6",
    "ENSG00000134853_PDGFRA",
    "ENSG00000115414_FN1",
    #
    "ENSG00000107485_GATA3",
    "ENSG00000118777_ABCG2",
    "ENSG00000165556_CDX2",
    "ENSG00000137869_CYP19A1",
    "ENSG00000172818_OVOL1",
    "ENSG00000126353_CCR7"
)

purrr::map(features_selected, function(x) {
    values <- log10(matrix_cpm_use[x, embedding$cell] + 1)
    values[embedding$batch == "PRJEB11202"] <- NA

    idx <- order(values, decreasing = FALSE, na.last = FALSE)

    plot_embedding_value(
        embedding = embedding[idx, c(x_column, y_column)],
        color_values = values[idx],
        colorbar_position = CB_POSITION,
        label = str_c(x %>% str_remove("E.+_"), sep = "\n"),
        label_position = c(x_label, y_label),
        geom_point_size = 0.5,
        sort_values = FALSE,
        FUN = function(x) x,
        label_size = 1.8
    )
}) %>%
    purrr::reduce(`+`) +
    patchwork::plot_layout(ncol = 3, byrow = FALSE) +
    patchwork::plot_annotation(
        theme = theme(plot.margin = margin())
    )

Lollipop

clusters_selected_lollipop <- c(
    11,
    18,
    0, 1, 8, 9, 12, 17
)

cells_selected_lollipop <- purrr::map(clusters_selected_lollipop, function(x) {
    embedding %>%
        filter(
            louvain == x,
            batch != "PRJEB11202"
        ) %>%
        pull(cell)
})
names(cells_selected_lollipop) <- clusters_selected_lollipop

d <- c(
    "ENSG00000181449_SOX2",
    "ENSG00000156574_NODAL",
    "ENSG00000111704_NANOG",
    "ENSG00000147596_PRDM14",
    "ENSG00000075388_FGF4",
    #
    "ENSG00000164736_SOX17",
    "ENSG00000125798_FOXA2",
    "ENSG00000136574_GATA4",
    "ENSG00000141448_GATA6",
    "ENSG00000115414_FN1",
    "ENSG00000187498_COL4A1",
    #
    "ENSG00000179348_GATA2",
    "ENSG00000107485_GATA3",
    "ENSG00000118777_ABCG2",
    #
    "ENSG00000126353_CCR7",
    "ENSG00000137869_CYP19A1",
    "ENSG00000169550_MUC15",
    "ENSG00000172818_OVOL1"
)

plot_lollipop(
    cells = cells_selected_lollipop,
    features = d,
    matrix_cpm = matrix_cpm_use,
    size_range_limits = c(0, 4),
    dot_size = 3
) +
    ggplot2::theme(
        legend.box = "horizontal",
        axis.text.x.top = element_text(angle = 90, vjust = 0.5, hjust = 0)
    )

Violin

features_selected <- c(
    "ENSG00000204531_POU5F1",
    "ENSG00000203909_DPPA5",
    "ENSG00000125798_FOXA2",
    "ENSG00000134853_PDGFRA",
    "ENSG00000107485_GATA3",
    "ENSG00000118777_ABCG2"
)

clusters_selected_violin <- c(
    11,
    18,
    0, 1, 8, 9, 12, 17
)

# blastoid
cells_violin <- purrr::map(clusters_selected_violin, function(x) {
    embedding %>%
        filter(
            louvain == x,
            batch != "PRJEB11202"
        ) %>%
        pull(cell)
})
names(cells_violin) <- clusters_selected_violin

p_violin_blastoid <- plot_violin(
    cells = cells_violin,
    features = features_selected,
    matrix_cpm = matrix_cpm_use,
    x_range_breaks = NULL
) +
    scale_fill_manual(
        name = NULL,
        values = color_palette_cluster
    ) +
    scale_color_manual(
        name = NULL,
        values = color_palette_cluster
    ) +
    labs(title = "Blastoid; 10x Genomics") +
    theme(
        strip.text = ggplot2::element_text(
            family = "Arial",
            size = 6
        ),
        plot.title = element_text(
            family = "Arial",
            size = 8,
            margin = ggplot2::margin(
                t = 0, r = 0, b = 0, l = 0,
                unit = "mm"
            )
        )
    )

# in vivo
cells_violin <- purrr::map(clusters_selected_violin, function(x) {
    embedding %>%
        filter(
            louvain == x,
            batch == "PRJEB11202"
        ) %>%
        pull(cell)
})
names(cells_violin) <- clusters_selected_violin

# re-aligned
p_violin_PRJEB11202 <- plot_violin(
    cells = cells_violin,
    features = features_selected,
    matrix_cpm = matrix_cpm_use,
    x_range_breaks = NULL,
    y_title = NULL
) +
    scale_fill_manual(
        name = NULL,
        values = color_palette_cluster
    ) +
    scale_color_manual(
        name = NULL,
        values = color_palette_cluster
    ) +
    labs(title = "Human pre-implantation embryos; Smart-Seq2") +
    theme(
        strip.text = ggplot2::element_text(
            family = "Arial",
            size = 6
        ),
        plot.title = element_text(
            family = "Arial",
            size = 8,
            margin = ggplot2::margin(
                t = 0, r = 0, b = 0, l = 0,
                unit = "mm"
            )
        )
    )
p_violin_PRJEB11202 <- p_violin_PRJEB11202 +
    theme(
        axis.text.y = element_blank(),
        axis.ticks.y = element_blank()
    )

p_violin_blastoid_dims <- patchwork::get_dim(p_violin_blastoid)
p_violin_PRJEB11202_aligned <- patchwork::set_dim(
    p_violin_PRJEB11202,
    p_violin_blastoid_dims
)
gridExtra::grid.arrange(
    p_violin_blastoid,
    p_violin_PRJEB11202_aligned,
    ncol = 2,
    clip = FALSE
)


Cluster composition

clusters_selected <- c(
    11,
    18,
    0, 1, 8, 9, 12, 17,
    10, 14
)

p_cell_distribution_lineage <- prepare_cluster_composition(
    embedding = embedding,
    x = louvain,
    group = lineage
) %>%
    filter(louvain %in% clusters_selected) %>%
    mutate(
        louvain = factor(louvain, levels = rev(clusters_selected)),
        lineage = factor(lineage,
            levels = rev(c("EPI", "HYP", "TE", "Pre-lineage", "Blastoid"))
        )
    ) %>%
    plot_barplot_cluster_composition(
        x = percentage,
        y = louvain,
        z = lineage
    ) +
    scale_fill_manual(
        name = NULL,
        values = c("grey85", "#8ace7e", "#ff684c", "#9467bd", "#ffd8b1"),
        breaks = c("Blastoid", "EPI", "HYP", "TE", "Pre-lineage"),
        labels = c(
            "Blastoid cells", "Epiblast (EPI)", "Hypoblast (HYP)",
            "Trophectoderm (TE)", "Pre-lineage"
        )
    )
cell_distribution_labels_left <- tibble::tribble(
    ~louvain, ~lineage, ~label_position, ~percentage,
    11L, "EPI", 0.068306011, 0.136612022,
    18L, "HYP", 0.20930232, 0.37209302,
    0L, "TE", 0.036855838, 0.063274625,
    1L, "TE", 0.20350404, 0.33513028,
    8L, "TE", 0.095531587, 0.181818182,
    9L, "TE", 0.19649123, 0.2877193,
    12L, "TE", 0.073972603, 0.126027397,
    17L, "TE", 0.09583335, 0.1916667,
    10L, "Pre-lineage", 0.284146342, 0.412195122,
    14L, "Pre-lineage", 0.24698795, 0.4939759
) %>% mutate(
    louvain = factor(louvain, levels = rev(clusters_selected))
)

cell_distribution_labels_right <- prepare_cluster_composition(
    embedding = embedding,
    x = louvain,
    group = lineage
) %>%
    filter(
        lineage == "Blastoid",
        louvain %in% clusters_selected
    ) %>%
    mutate(
        louvain = factor(
            louvain,
            levels = rev(louvain)
        )
    )

p_cell_distribution_lineage +
    geom_text(
        aes(
            x = label_position,
            y = louvain,
            label = scales::percent(percentage, accuracy = 1L),
            group = NULL
        ),
        size = 1.8,
        family = "Arial",
        color = "black",
        data = cell_distribution_labels_left,
        hjust = 0.5
    ) +
    geom_text(
        aes(
            x = 0.825,
            y = louvain,
            label = scales::percent(percentage, accuracy = 1L),
            group = NULL
        ),
        size = 1.8,
        family = "Arial",
        color = "black",
        data = cell_distribution_labels_right,
        hjust = 0
    )


Heatmap construction

# prepare features; cells; heatmap matrix;
features_heatmap <- c(
    "ENSG00000204531_POU5F1",
    "ENSG00000184344_GDF3",
    "ENSG00000203909_DPPA5",
    "ENSG00000181449_SOX2",
    "ENSG00000111704_NANOG",
    "ENSG00000241186_TDGF1",
    "ENSG00000156574_NODAL",
    "ENSG00000254339_AC064802.1",
    "ENSG00000283567_C19orf85",
    "ENSG00000151650_VENTX",
    "ENSG00000163032_VSNL1",
    "ENSG00000159231_CBR3",
    "ENSG00000145423_SFRP2",
    "ENSG00000006468_ETV1",
    "ENSG00000147596_PRDM14",
    #
    "ENSG00000136574_GATA4",
    "ENSG00000164266_SPINK1",
    "ENSG00000158966_CACHD1",
    "ENSG00000164093_PITX2",
    "ENSG00000087303_NID2",
    "ENSG00000174358_SLC6A19",
    "ENSG00000134962_KLB",
    "ENSG00000167780_SOAT2",
    "ENSG00000110245_APOC3",
    "ENSG00000170558_CDH2",
    "ENSG00000125848_FLRT3",
    "ENSG00000129538_RNASE1",
    "ENSG00000100079_LGALS2",
    "ENSG00000134853_PDGFRA",
    "ENSG00000164736_SOX17",
    "ENSG00000017427_IGF1",
    "ENSG00000275410_HNF1B",
    "ENSG00000118137_APOA1",
    "ENSG00000198336_MYL4",
    "ENSG00000171557_FGG",
    "ENSG00000125798_FOXA2",
    "ENSG00000146374_RSPO3",
    "ENSG00000115414_FN1",
    "ENSG00000164292_RHOBTB3",
    "ENSG00000141448_GATA6",
    "ENSG00000187498_COL4A1",
    #
    "ENSG00000196549_MME",
    "ENSG00000138814_PPP3CA",
    "ENSG00000082074_FYB1",
    "ENSG00000123191_ATP7B",
    "ENSG00000175318_GRAMD2A",
    "ENSG00000100593_ISM2",
    "ENSG00000265107_GJA5",
    "ENSG00000226887_ERVMER34-1",
    "ENSG00000183734_ASCL2",
    "ENSG00000143850_PLEKHA6",
    "ENSG00000103534_TMC5",
    "ENSG00000132470_ITGB4",
    "ENSG00000180999_C1orf105",
    "ENSG00000109610_SOD3",
    "ENSG00000143369_ECM1",
    "ENSG00000204632_HLA-G",
    "ENSG00000113594_LIFR",
    "ENSG00000168394_TAP1",
    "ENSG00000183287_CCBE1",
    "ENSG00000079393_DUSP13",
    "ENSG00000121769_FABP3",
    "ENSG00000182985_CADM1",
    "ENSG00000164692_COL1A2",
    "ENSG00000112655_PTK7",
    "ENSG00000134873_CLDN10",
    "ENSG00000108375_RNF43",
    "ENSG00000254726_MEX3A",
    "ENSG00000164120_HPGD",
    "ENSG00000081026_MAGI3",
    "ENSG00000120738_EGR1",
    "ENSG00000108960_MMD",
    "ENSG00000112137_PHACTR1",
    "ENSG00000166450_PRTG",
    "ENSG00000164099_PRSS12",
    "ENSG00000143320_CRABP2",
    "ENSG00000145681_HAPLN1",
    "ENSG00000113196_HAND1",
    "ENSG00000198300_PEG3",
    "ENSG00000101144_BMP7",
    "ENSG00000174498_IGDCC3",
    "ENSG00000132005_RFX1",
    "ENSG00000099814_CEP170B",
    "ENSG00000101986_ABCD1",
    "ENSG00000144648_ACKR2",
    "ENSG00000074410_CA12",
    "ENSG00000125740_FOSB",
    "ENSG00000183979_NPB",
    "ENSG00000214049_UCA1",
    "ENSG00000153071_DAB2",
    "ENSG00000126353_CCR7",
    "ENSG00000105880_DLX5",
    "ENSG00000124749_COL21A1",
    "ENSG00000112041_TULP1",
    "ENSG00000120211_INSL4",
    "ENSG00000223850_MYCNUT",
    "ENSG00000164707_SLC13A4",
    "ENSG00000173157_ADAMTS20",
    "ENSG00000196482_ESRRG",
    "ENSG00000180875_GREM2",
    "ENSG00000170255_MRGPRX1",
    "ENSG00000267943_AC010328.1",
    "ENSG00000172818_OVOL1",
    "ENSG00000137270_GCM1",
    "ENSG00000137869_CYP19A1",
    "ENSG00000135678_CPM",
    "ENSG00000171476_HOPX",
    "ENSG00000249861_LGALS16",
    "ENSG00000108244_KRT23",
    "ENSG00000169550_MUC15",
    "ENSG00000260034_LCMT1-AS2",
    "ENSG00000269526_ERVV-1",
    "ENSG00000028137_TNFRSF1B",
    "ENSG00000124731_TREM1",
    "ENSG00000072422_RHOBTB1",
    "ENSG00000185215_TNFAIP2",
    "ENSG00000164877_MICALL2",
    "ENSG00000244476_ERVFRD-1",
    "ENSG00000280109_PLAC4",
    "ENSG00000111057_KRT18",
    "ENSG00000170421_KRT8",
    "ENSG00000169583_CLIC3",
    "ENSG00000107485_GATA3",
    "ENSG00000179348_GATA2",
    "ENSG00000187186_AL162231.1",
    "ENSG00000176155_CCDC57",
    "ENSG00000133243_BTBD2",
    "ENSG00000179364_PACS2",
    "ENSG00000237651_C2orf74",
    "ENSG00000125726_CD70",
    "ENSG00000156587_UBE2L6",
    "ENSG00000182165_TP53TG1",
    "ENSG00000163017_ACTG2",
    "ENSG00000143632_ACTA1",
    "ENSG00000119632_IFI27L2",
    "ENSG00000185847_LINC01405",
    "ENSG00000116661_FBXO2",
    "ENSG00000196878_LAMB3",
    "ENSG00000122861_PLAU",
    "ENSG00000142227_EMP3",
    "ENSG00000011422_PLAUR",
    "ENSG00000100985_MMP9",
    "ENSG00000026508_CD44",
    "ENSG00000104368_PLAT",
    "ENSG00000164171_ITGA2",
    "ENSG00000101198_NKAIN4",
    "ENSG00000116774_OLFML3",
    "ENSG00000125398_SOX9",
    "ENSG00000124225_PMEPA1",
    "ENSG00000118785_SPP1",
    "ENSG00000163453_IGFBP7",
    "ENSG00000182871_COL18A1",
    "ENSG00000110925_CSRNP2",
    "ENSG00000165886_UBTD1",
    "ENSG00000115073_ACTR1B",
    "ENSG00000256288_AC022075.3",
    "ENSG00000228106_AL392172.1",
    "ENSG00000163577_EIF5A2"
)


# heatmap matrix
clusters_selected_heatmap <- c(
    11,
    18,
    0, 1, 8, 9, 12, 17
)

matrix_heatmap <- matrix_cpm_use[
    features_heatmap,
    embedding %>%
        filter(
            louvain %in% clusters_selected_heatmap,
            batch != "PRJEB11202"
        ) %>%
        pull(cell)
]

matrix_heatmap <- matrix_heatmap[rowSums(matrix_heatmap) != 0, ]
matrix_heatmap %>% dim()
## [1]  158 3784
matrix_heatmap <- log10(matrix_heatmap + 1)
matrix_heatmap <- t(scale(t(matrix_heatmap)))

heatmap_limits <- quantile(matrix_heatmap, c(0.05, 0.95))
matrix_heatmap[matrix_heatmap < heatmap_limits[1]] <- heatmap_limits[1]
matrix_heatmap[matrix_heatmap > heatmap_limits[2]] <- heatmap_limits[2]
# sample cells
cells_heatmap_sampled <- purrr::map(clusters_selected_heatmap, function(x) {
    cells_in_group <- embedding %>%
        filter(
            louvain == x,
            batch != "PRJEB11202"
        ) %>%
        pull(cell)

    cat(length(cells_in_group), "\n")

    if (length(cells_in_group) >= 100) {
        c <- sample(cells_in_group, 100)
    } else {
        c <- cells_in_group
    }

    return(c)
})
## 312 
## 51 
## 1428 
## 682 
## 528 
## 376 
## 310 
## 97
names(cells_heatmap_sampled) <- clusters_selected_heatmap

# cells
clusters_EPI <- 11
clusters_HYP <- 18
clusters_TE <- c(0, 1, 8, 9, 12, 17)

cells_heatmap_EPI <- embedding %>%
    filter(
        batch != "PRJEB11202",
        louvain %in% clusters_EPI,
        cell %in% unlist(cells_heatmap_sampled)
    ) %>%
    pull(cell)

cells_heatmap_HYP <- embedding %>%
    filter(
        batch != "PRJEB11202",
        louvain %in% clusters_HYP,
        cell %in% unlist(cells_heatmap_sampled)
    ) %>%
    pull(cell)

if (FALSE) {
    cells_heatmap_TE <- embedding %>%
        filter(
            batch != "PRJEB11202",
            louvain %in% clusters_TE,
            cell %in% unlist(cells_heatmap_sampled)
        ) %>%
        pull(cell)
}

cells_heatmap_TE <- cells_heatmap_sampled[as.character(clusters_TE)] %>%
    unlist()

purrr::map_int(
    list(cells_heatmap_EPI, cells_heatmap_HYP, cells_heatmap_TE),
    length
)
## [1] 100  51 597
# column annotation
ha_cluster <- embedding[
    match(
        x = cells_heatmap_sampled %>% unlist(),
        table = embedding$cell
    ), "louvain",
    drop = TRUE
]

ha_lineage <- rep("TE", length(ha_cluster))
ha_lineage[ha_cluster == "11"] <- "EPI"
ha_lineage[ha_cluster %in% c("18")] <- "HYP"
# map color
col_fun <- circlize::colorRamp2(
    quantile(
        c(
            min(matrix_heatmap),
            max(matrix_heatmap)
        ),
        seq(0, 1, 0.1)
    ),
    viridis::plasma(11)
)
# features to mark
features_to_mark_left <- c(
    "ENSG00000184344_GDF3",
    "ENSG00000241186_TDGF1",
    "ENSG00000156574_NODAL",
    "ENSG00000066468_FGFR2",
    "ENSG00000111704_NANOG",
    "ENSG00000204531_POU5F1",
    "ENSG00000203909_DPPA5",
    "ENSG00000181449_SOX2",
    "ENSG00000186103_ARGFX",
    "ENSG00000147596_PRDM14",
    "ENSG00000171872_KLF17",
    "ENSG00000086548_CEACAM6",
    "ENSG00000166073_GPR176",
    "ENSG00000125740_FOSB",
    "ENSG00000105880_DLX5",
    "ENSG00000169583_CLIC3",
    "ENSG00000244476_ERVFRD-1",
    "ENSG00000072422_RHOBTB1",
    "ENSG00000137869_CYP19A1",
    "ENSG00000169550_MUC15",
    "ENSG00000108244_KRT23",
    "ENSG00000260034_LCMT1-AS2"
)

features_to_mark_right <- c(
    "ENSG00000187498_COL4A1",
    "ENSG00000136574_GATA4",
    "ENSG00000115414_FN1",
    "ENSG00000125798_FOXA2",
    "ENSG00000164736_SOX17",
    "ENSG00000141448_GATA6",
    "ENSG00000275410_HNF1B",
    "ENSG00000153707_PTPRD",
    "ENSG00000101441_CST4",
    "ENSG00000146374_RSPO3",
    "ENSG00000111057_KRT18",
    "ENSG00000170421_KRT8",
    "ENSG00000153071_DAB2",
    "ENSG00000107485_GATA3",
    "ENSG00000179348_GATA2",
    "ENSG00000144648_ACKR2",
    "ENSG00000126353_CCR7",
    "ENSG00000137270_GCM1",
    "ENSG00000180875_GREM2",
    "ENSG00000172818_OVOL1",
    "ENSG00000265107_GJA5",
    "ENSG00000132470_ITGB4",
    "ENSG00000074410_CA12",
    "ENSG00000124749_COL21A1"
)

#
features_to_mark_left <- features_to_mark_left[
    features_to_mark_left %in% rownames(matrix_heatmap)
]

ha_left <- ComplexHeatmap::rowAnnotation(
    foo = ComplexHeatmap::anno_mark(
        at = which(rownames(matrix_heatmap) %in% features_to_mark_left),
        labels = features_to_mark_left %>% str_remove(pattern = "E.+_"),
        which = "row",
        side = "left",
        lines_gp = grid::gpar(col = "grey50"),
        labels_gp = grid::gpar(
            fontfamily = "Arial",
            fontsize = 5
        )
    )
)

#
features_to_mark_right <- features_to_mark_right[
    features_to_mark_right %in% rownames(matrix_heatmap)
]

ha_right <- ComplexHeatmap::rowAnnotation(
    foo = ComplexHeatmap::anno_mark(
        at = which(rownames(matrix_heatmap) %in% features_to_mark_right),
        labels = features_to_mark_right %>% str_remove(pattern = "E.+_"),
        which = "row",
        side = "right",
        lines_gp = grid::gpar(col = "grey50"),
        labels_gp = grid::gpar(
            fontfamily = "Arial",
            fontsize = 5
        )
    )
)
use_raster <- FALSE
# EPI
anno_labels_tbl_EPI <- table(
    ha_cluster[ha_cluster %in% clusters_EPI]
)[as.character(clusters_EPI)] %>%
    as_tibble() %>%
    mutate(
        cum_sum = cumsum(value),
        position = cum_sum - value / 2
    )

anno_labels_cluster_EPI <- rep(NA, length(ha_cluster[ha_cluster %in% clusters_EPI]))

if (nchar(as.character(clusters_EPI)) > 1) {
    cluster_label <- strsplit(as.character(clusters_EPI), "")[[1]]
    
    anno_labels_cluster_EPI[anno_labels_tbl_EPI$position - 5] <- cluster_label[1]
    anno_labels_cluster_EPI[anno_labels_tbl_EPI$position + 5] <- cluster_label[2]
} else {
    anno_labels_cluster_EPI[anno_labels_tbl_EPI$position] <- as.character(clusters_EPI)
}

# anno_labels_cluster_EPI %>% table(exclude = "")

ha_column_EPI <- ComplexHeatmap::HeatmapAnnotation(
    #
    lineage = ComplexHeatmap::anno_simple(
        ha_lineage[ha_lineage == "EPI"],
        # pch = anno_labels_cluster,
        col = setNames(
            object = c("grey85", "#8ace7e", "#ff684c", "#9467bd", "#ffd8b1"),
            nm = c("Blastoid", "EPI", "HYP", "TE", "n/a")
        ),
        which = "column",
        pt_size = unit(2, "mm"),
        pt_gp = grid::gpar(
            fontfamily = "Arial",
            fontsize = 5
        ),
        simple_anno_size = unit(2, "mm")
    ),
    #
    cluster = ComplexHeatmap::anno_simple(
        ha_cluster[ha_cluster %in% clusters_EPI],
        pch = anno_labels_cluster_EPI,
        col = color_palette_cluster[as.character(clusters_selected_heatmap)],
        which = "column",
        # pt_size = unit(2, "mm"),
        pt_gp = grid::gpar(
            fontfamily = "Arial",
            fontsize = 5
        ),
        simple_anno_size = unit(2, "mm")
    ),
    #
    show_annotation_name = TRUE,
    annotation_label = c(
        "Lineage",
        "Cluster"
    ),
    annotation_name_gp = grid::gpar(fontfamily = "Arial", fontsize = 6),
    annotation_name_side = "left"
)

ht_EPI <- ComplexHeatmap::Heatmap(
    matrix = matrix_heatmap[, cells_heatmap_EPI] %>% as.matrix(),
    rect_gp = grid::gpar(col = NA, lwd = 0),
    # col = viridis::plasma(n = 10),
    # col = wesanderson::wes_palette("Zissou1", 50, type = "continuous"),
    col = col_fun,
    #
    # row_title = character(0),
    # row_title_side = c("left", "right"),
    row_title_gp = grid::gpar(fontfamily = "Arial", fontsize = 6),
    row_title_rot = 0,
    # column_title = character(0),
    # column_title_side = c("top", "bottom"),
    column_title_gp = grid::gpar(fontfamily = "Arial", fontsize = 6),
    column_title_rot = 0,
    #
    cluster_rows = FALSE,
    show_row_dend = FALSE,
    cluster_columns = FALSE,
    show_column_dend = FALSE,
    #
    show_row_names = FALSE,
    # row_labels = str_remove(string = rownames(matrix_heatmap), pattern = "^E.+_"),
    # row_names_side = c("left"),
    # row_names_gp = grid::gpar(fontfamily = "Arial", fontsize = 6),
    # row_names_rot = 0,
    # row_names_centered = FALSE,
    show_column_names = FALSE,
    # column_labels = colnames(matrix),
    # column_names_side = c("top"),
    # column_names_gp = grid::gpar(fontfamily = "Arial", fontsize = 6),
    # column_names_rot = 90,
    # column_names_centered = FALSE,
    #
    top_annotation = ha_column_EPI,
    bottom_annotation = NULL,
    left_annotation = ha_left,
    right_annotation = NULL,
    #
    # row_split = table_s2_sheet4$ClusterID,
    # row_gap = unit(0.3, "mm"),
    column_split = factor(
        ha_lineage[ha_lineage == "EPI"],
        levels = unique(ha_lineage[ha_lineage == "EPI"])
    ),
    column_gap = unit(0, "mm"),
    #
    show_heatmap_legend = FALSE,
    heatmap_legend_param = list(
        # title = expression(paste("Log"[10], " (CPM + 1)")),
        title = "Expr",
        title_gp = grid::gpar(
            fontfamily = "Arial",
            fontsize = 6
        ),
        legend_direction = "vertical",
        labels_gp = grid::gpar(
            fontfamily = "Arial",
            fontsize = 5
        ),
        legend_height = unit(15, "mm"),
        legend_width = unit(5, "mm")
    ),
    #
    use_raster = use_raster,
    #
    width = unit(8, "mm")
)
# ComplexHeatmap::draw(ht_EPI)
# HYP
anno_labels_tbl_HYP <- table(
    ha_cluster[ha_cluster %in% clusters_HYP]
)[as.character(clusters_HYP)] %>%
    as_tibble() %>%
    mutate(
        cum_sum = cumsum(value),
        position = cum_sum - value / 2
    )

anno_labels_cluster_HYP <- rep(NA, length(ha_cluster[ha_cluster %in% clusters_HYP]))

# anno_labels_cluster_HYP[anno_labels_tbl_HYP$position] <- as.character(clusters_HYP)
# anno_labels_cluster_HYP %>% table(exclude = "")

if (nchar(as.character(clusters_HYP)) > 1) {
    cluster_label <- strsplit(as.character(clusters_HYP), "")[[1]]
    
    anno_labels_cluster_HYP[anno_labels_tbl_HYP$position - 5] <- cluster_label[1]
    anno_labels_cluster_HYP[anno_labels_tbl_HYP$position + 5] <- cluster_label[2]
} else {
    anno_labels_cluster_HYP[anno_labels_tbl_HYP$position] <- as.character(clusters_HYP)
}

ha_column_HYP <- ComplexHeatmap::HeatmapAnnotation(
    #
    lineage = ComplexHeatmap::anno_simple(
        ha_lineage[ha_lineage == "HYP"],
        # pch = anno_labels_cluster,
        col = setNames(
            object = c("grey85", "#8ace7e", "#ff684c", "#9467bd", "#ffd8b1"),
            nm = c("Blastoid", "EPI", "HYP", "TE", "n/a")
        ),
        which = "column",
        pt_size = unit(2, "mm"),
        pt_gp = grid::gpar(
            fontfamily = "Arial",
            fontsize = 5
        ),
        simple_anno_size = unit(2, "mm")
    ),
    #
    cluster = ComplexHeatmap::anno_simple(
        ha_cluster[ha_cluster %in% clusters_HYP],
        pch = anno_labels_cluster_HYP,
        col = color_palette_cluster[as.character(clusters_selected_heatmap)],
        which = "column",
        # pt_size = unit(2, "mm"),
        pt_gp = grid::gpar(
            fontfamily = "Arial",
            fontsize = 5
        ),
        simple_anno_size = unit(2, "mm")
    ),
    #
    show_annotation_name = FALSE,
    annotation_label = c(
        "Lineage",
        "Cluster"
    ),
    annotation_name_gp = grid::gpar(fontfamily = "Arial", fontsize = 6),
    annotation_name_side = "left"
)

ht_HYP <- ComplexHeatmap::Heatmap(
    matrix = matrix_heatmap[, cells_heatmap_HYP] %>% as.matrix(),
    rect_gp = grid::gpar(col = NA, lwd = 0),
    # col = viridis::plasma(n = 10),
    # col = wesanderson::wes_palette("Zissou1", 50, type = "continuous"),
    col = col_fun,
    #
    # row_title = character(0),
    # row_title_side = c("left", "right"),
    row_title_gp = grid::gpar(fontfamily = "Arial", fontsize = 6),
    row_title_rot = 0,
    # column_title = character(0),
    # column_title_side = c("top", "bottom"),
    column_title_gp = grid::gpar(fontfamily = "Arial", fontsize = 6),
    column_title_rot = 0,
    #
    cluster_rows = FALSE,
    show_row_dend = FALSE,
    cluster_columns = FALSE,
    show_column_dend = FALSE,
    #
    show_row_names = FALSE,
    # row_labels = str_remove(string = rownames(matrix_heatmap), pattern = "^E.+_"),
    # row_names_side = c("left"),
    # row_names_gp = grid::gpar(fontfamily = "Arial", fontsize = 6),
    # row_names_rot = 0,
    # row_names_centered = FALSE,
    show_column_names = FALSE,
    # column_labels = colnames(matrix),
    # column_names_side = c("top"),
    # column_names_gp = grid::gpar(fontfamily = "Arial", fontsize = 6),
    # column_names_rot = 90,
    # column_names_centered = FALSE,
    #
    top_annotation = ha_column_HYP,
    bottom_annotation = NULL,
    left_annotation = NULL,
    right_annotation = NULL,
    #
    # row_split = table_s2_sheet4$ClusterID,
    # row_gap = unit(0.3, "mm"),
    column_split = factor(
        ha_lineage[ha_lineage == "HYP"],
        levels = unique(ha_lineage[ha_lineage == "HYP"])
    ),
    column_gap = unit(0, "mm"),
    #
    show_heatmap_legend = FALSE,
    heatmap_legend_param = list(
        # title = expression(paste("Log"[10], " (CPM + 1)")),
        title = "Expr",
        title_gp = grid::gpar(
            fontfamily = "Arial",
            fontsize = 6
        ),
        legend_direction = "vertical",
        labels_gp = grid::gpar(
            fontfamily = "Arial",
            fontsize = 5
        ),
        legend_height = unit(15, "mm"),
        legend_width = unit(5, "mm")
    ),
    #
    use_raster = use_raster,
    #
    width = unit(5, "mm")
)
# ComplexHeatmap::draw(ht_HYP)
# TE
anno_labels_tbl_TE <- table(
    ha_cluster[ha_cluster %in% clusters_TE]
)[as.character(clusters_TE)] %>%
    enframe() %>%
    mutate(
        cum_sum = cumsum(value),
        position = cum_sum - value / 2
    )

anno_labels_tbl_TE <- purrr::map(anno_labels_tbl_TE$name, function(x) {
    # works but ugly
    a <- anno_labels_tbl_TE %>%
        filter(name == x)

    if (nchar(a$name) > 1) {
        cluster_label <- strsplit(as.character(a$name), "")[[1]]

        a <- rbind(a, a)

        a[1, 1] <- cluster_label[1]
        a[2, 1] <- cluster_label[2]
        a %<>% as.data.frame()
        a[1, 4] <- a[1, 4] - 5
        a[2, 4] <- a[2, 4] + 5
    }
    return(a)
}) %>%
    purrr::reduce(rbind)

anno_labels_cluster_TE <- rep(NA, length(ha_cluster[ha_cluster %in% clusters_TE]))
anno_labels_cluster_TE[anno_labels_tbl_TE$position] <- anno_labels_tbl_TE$name
# anno_labels_cluster_TE %>% table(exclude = "")

ha_column_TE <- ComplexHeatmap::HeatmapAnnotation(
    #
    lineage = ComplexHeatmap::anno_simple(
        ha_lineage[ha_lineage == "TE"],
        # pch = anno_labels_cluster,
        col = setNames(
            object = c("grey85", "#8ace7e", "#ff684c", "#9467bd", "#ffd8b1"),
            nm = c("Blastoid", "EPI", "HYP", "TE", "n/a")
        ),
        which = "column",
        pt_size = unit(2, "mm"),
        pt_gp = grid::gpar(
            fontfamily = "Arial",
            fontsize = 5
        ),
        simple_anno_size = unit(2, "mm")
    ),
    #
    cluster = ComplexHeatmap::anno_simple(
        ha_cluster[ha_cluster %in% clusters_TE],
        pch = anno_labels_cluster_TE,
        col = color_palette_cluster[as.character(clusters_selected_heatmap)],
        which = "column",
        # pt_size = unit(2, "mm"),
        pt_gp = grid::gpar(
            fontfamily = "Arial",
            fontsize = 5
        ),
        simple_anno_size = unit(2, "mm")
    ),
    #
    show_annotation_name = FALSE,
    annotation_label = c(
        "Lineage",
        "Cluster"
    ),
    annotation_name_gp = grid::gpar(fontfamily = "Arial", fontsize = 6),
    annotation_name_side = "left"
)

ht_TE <- ComplexHeatmap::Heatmap(
    # matrix = matrix_heatmap[, colnames(matrix_heatmap) %in% cells_heatmap_TE] %>% as.matrix(),
    matrix = matrix_heatmap[, cells_heatmap_TE] %>% as.matrix(),
    rect_gp = grid::gpar(col = NA, lwd = 0),
    # col = viridis::plasma(n = 10),
    # col = wesanderson::wes_palette("Zissou1", 50, type = "continuous"),
    col = col_fun,
    #
    # row_title = character(0),
    # row_title_side = c("left", "right"),
    row_title_gp = grid::gpar(fontfamily = "Arial", fontsize = 6),
    row_title_rot = 0,
    # column_title = character(0),
    # column_title_side = c("top", "bottom"),
    column_title_gp = grid::gpar(fontfamily = "Arial", fontsize = 6),
    column_title_rot = 0,
    #
    cluster_rows = FALSE,
    show_row_dend = FALSE,
    cluster_columns = FALSE,
    show_column_dend = FALSE,
    #
    show_row_names = FALSE,
    # row_labels = str_remove(string = rownames(matrix_heatmap), pattern = "^E.+_"),
    # row_names_side = c("left"),
    # row_names_gp = grid::gpar(fontfamily = "Arial", fontsize = 6),
    # row_names_rot = 0,
    # row_names_centered = FALSE,
    show_column_names = FALSE,
    # column_labels = colnames(matrix),
    # column_names_side = c("top"),
    # column_names_gp = grid::gpar(fontfamily = "Arial", fontsize = 6),
    # column_names_rot = 90,
    # column_names_centered = FALSE,
    #
    top_annotation = ha_column_TE,
    bottom_annotation = NULL,
    left_annotation = NULL,
    right_annotation = ha_right,
    #
    # row_split = table_s2_sheet4$ClusterID,
    # row_gap = unit(0.3, "mm"),
    column_split = factor(
        ha_lineage[ha_lineage == "TE"],
        levels = unique(ha_lineage[ha_lineage == "TE"])
    ),
    column_gap = unit(0, "mm"),
    #
    show_heatmap_legend = FALSE,
    heatmap_legend_param = list(
        # title = expression(paste("Log"[10], " (CPM + 1)")),
        title = "Expr",
        title_gp = grid::gpar(
            fontfamily = "Arial",
            fontsize = 6
        ),
        legend_direction = "vertical",
        labels_gp = grid::gpar(
            fontfamily = "Arial",
            fontsize = 5
        ),
        legend_height = unit(15, "mm"),
        legend_width = unit(5, "mm")
    ),
    #
    use_raster = use_raster,
    #
    width = unit(50, "mm")
)
# legend
lgd_colorbar <- ComplexHeatmap::Legend(
    col_fun = col_fun,
    title = "Expr",
    grid_height = unit(1, "mm"),
    grid_width = unit(2, "mm"),
    legend_height = unit(10, "mm"),
    legend_width = unit(2, "mm"),
    title_gp = grid::gpar(
        fontfamily = "Arial",
        fontsize = 6
    ),
    labels_gp = grid::gpar(
        fontfamily = "Arial",
        fontsize = 5
    )
)

pd <- ComplexHeatmap::packLegend(
    # lgd_cluster,
    # lgd_lineage,
    lgd_colorbar,
    # gap = unit(8, "mm"),
    direction = "vertical"
)
ComplexHeatmap::draw(
    ht_EPI + ht_HYP + ht_TE,
    heatmap_legend_list = list(pd),
    ht_gap = unit(0, "mm")
)


Gene Ontology enrichment

enriched_go <- tibble::tribble(
    ~category, ~rank,       ~go_id,                           ~term, ~p_value, ~p_value_log,
        "EPI",  1L, "GO:0019827", "stem cell population maintenance",    9e-07,  6.045757491,
        "EPI",  2L, "GO:0048368",     "lateral mesoderm development",  0.00053,   3.27572413,
        "EPI",  3L, "GO:0009790",               "embryo development",  0.00088,  3.055517328,
        "HYP",  1L, "GO:0048598",          "embryonic morphogenesis",  3.9e-07,  6.408935393,
        "HYP",  2L, "GO:0007369",                     "gastrulation",  1.8e-05,  4.744727495,
        "HYP",  3L, "GO:0007492",             "endoderm development",  0.00284,   2.54668166,
         "TE",  1L, "GO:0001890",             "placenta development",    6e-11,  10.22184875,
         "TE",  2L, "GO:0001892",   "embryonic placenta development",    2e-08,  7.698970004,
         "TE",  3L, "GO:0048513",         "animal organ development",  2.7e-07,  6.568636236
    )

enriched_go %>%
    mutate(
        category = factor(
            category,
            levels = c("EPI", "HYP", "TE") #  %>% rev()
        ),
        rank = factor(
            rank,
            levels = c(3, 2, 1)
        )
    ) %>%
    plot_barplot_go_enrichment(
        x = p_value_log,
        y = rank,
        z = category
    ) +
    facet_wrap(
        ~category,
        nrow = 1,
        strip.position = "left",
        scales = "free_x",
        labeller = labeller(
            category = c("EPI" = "EPI", "HYP" = "HYP", "TE" = "TE")
        )
    ) +
    geom_text(
        aes(
            x = 0,
            label = term,
            group = NULL
        ),
        size = 2.2,
        family = "Arial",
        color = "black",
        data = enriched_go %>%
            mutate(
                category = factor(
                    category,
                    levels = c("EPI", "HYP", "TE")
                ),
                rank = factor(
                    rank,
                    levels = c(3, 2, 1)
                ),
                term = paste(" ", term)
            ),
        hjust = 0
    ) +
    scale_fill_manual(
        values = c("#8ace7e", "#ff684c", "#9467bd")
    )


High concordance of blastoid replicates

Data loading

EMBEDDING_FILE <- "embedding_replicate.csv.gz"

embedding_replicate <- read_csv(
    file = file.path(
        PROJECT_DIR,
        "github",
        "data",
        EMBEDDING_FILE
    )
) %>% 
    dplyr::select(cell:y_fitsne)
## 
## ── Column specification ────────────────────────────────────────────────────────
## cols(
##   cell = col_character(),
##   batch = col_character(),
##   louvain = col_double(),
##   x_tsne = col_double(),
##   y_tsne = col_double(),
##   x_umap = col_double(),
##   y_umap = col_double(),
##   x_fitsne = col_double(),
##   y_fitsne = col_double(),
##   x_phate = col_double(),
##   y_phate = col_double(),
##   `x_min_dist=0.1` = col_double(),
##   `y_min_dist=0.1` = col_double(),
##   x_multicoretsne = col_double(),
##   y_multicoretsne = col_double()
## )
rm(EMBEDDING_FILE)

Embedding visualization

EMBEDDING_TITLE_PREFIX <- "FIt-SNE"

x_column <- "x_fitsne"
y_column <- "y_fitsne"

Clustering & batch & UMI

p_embedding_replicate_cluster <- plot_embedding(
    embedding = embedding_replicate[, c(x_column, y_column)],
    color_values = embedding_replicate$louvain %>% as.factor(),
    label = paste0(EMBEDDING_TITLE_PREFIX, "; Cluster"),
    label_position = NULL,
    show_color_value_labels = TRUE,
    show_color_legend = FALSE,
    geom_point_size = 0.6,
    sort_values = FALSE
) +
    scale_color_manual(
        values = gg_color_hue(n = length(unique(embedding_replicate$louvain)))
    )
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
p_embedding_replicate_batch <- plot_embedding(
    embedding = embedding_replicate[, c(x_column, y_column)],
    color_values = embedding_replicate$batch %>% as.factor(),
    label = paste0(EMBEDDING_TITLE_PREFIX, "; Batch"),
    label_position = NULL,
    show_color_value_labels = FALSE,
    show_color_legend = TRUE,
    geom_point_size = 0.3,
    sort_values = FALSE
) +
    scale_color_manual(
        values = gg_color_hue(n = length(unique(embedding_replicate$batch)))
    ) +
    guides(colour = guide_legend(override.aes = list(size = 3), ncol = 1)) +
    labs(color = NULL) +
    customized_theme()
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
CB_POSITION <- c(0.875, 0.3)
p_embedding_replicate_umi <- plot_embedding_value(
    embedding = embedding_replicate[, c(x_column, y_column)],
    color_values = log10(Matrix::colSums(matrix_readcount_use[, embedding_replicate$cell]) + 1),
    colorbar_position = CB_POSITION,
    label = paste0(EMBEDDING_TITLE_PREFIX, "; UMI"),
    label_position = NULL,
    geom_point_size = 0.6,
    sort_values = TRUE,
    FUN = function(x) x
)
purrr::reduce(
    list(
        p_embedding_replicate_cluster,
        p_embedding_replicate_batch,
        p_embedding_replicate_umi
    ), `+`
) +
    patchwork::plot_layout(ncol = 3) +
    patchwork::plot_annotation(
        theme = theme(plot.margin = margin())
    )

Cluster composition

prepare_cluster_composition(
    embedding = embedding_replicate,
    x = louvain,
    group = batch
) %>%
    mutate(
        louvain = as.factor(louvain)
    ) %>%
    plot_barplot(
        x = louvain,
        y = percentage,
        z = batch
    )


Blastoids derived stem cell lines

Data loading

EMBEDDING_FILE <- "embedding_stem.csv.gz"

embedding_stem <- read_csv(
    file = file.path(
        PROJECT_DIR,
        "github",
        "data",
        EMBEDDING_FILE
    )
)
## 
## ── Column specification ────────────────────────────────────────────────────────
## cols(
##   cell = col_character(),
##   batch = col_character(),
##   louvain = col_double(),
##   x_tsne = col_double(),
##   y_tsne = col_double(),
##   x_umap = col_double(),
##   y_umap = col_double(),
##   x_fitsne = col_double(),
##   y_fitsne = col_double(),
##   x_phate = col_double(),
##   y_phate = col_double(),
##   `x_min_dist=0.1` = col_double(),
##   `y_min_dist=0.1` = col_double(),
##   x_multicoretsne = col_double(),
##   y_multicoretsne = col_double()
## )
rm(EMBEDDING_FILE)


Embedding visualization

EMBEDDING_TITLE_PREFIX <- "FIt-SNE"

x_column <- "x_fitsne"
y_column <- "y_fitsne"

Clustering & batch & UMI

p_embedding_stem_batch <- plot_embedding(
    embedding = embedding_stem[, c(x_column, y_column)],
    color_values = embedding_stem$batch %>% as.factor(),
    label = paste0(EMBEDDING_TITLE_PREFIX, "; Batch"),
    label_position = NULL,
    show_color_value_labels = FALSE,
    show_color_legend = TRUE,
    geom_point_size = 0.6,
    sort_values = FALSE
) +
    scale_color_manual(
        values = gg_color_hue(n = length(unique(embedding_stem$batch)))
    ) +
    guides(colour = guide_legend(override.aes = list(size = 3), ncol = 1)) +
    labs(color = NULL) +
    customized_theme()

CB_POSITION <- c(0.875, 0.3)
p_embedding_stem_umi <- plot_embedding_value(
    embedding = embedding_stem[, c(x_column, y_column)],
    color_values = log10(Matrix::colSums(matrix_readcount_use[, embedding_stem$cell]) + 1),
    colorbar_position = CB_POSITION,
    label = paste0(EMBEDDING_TITLE_PREFIX, "; UMI"),
    label_position = NULL,
    geom_point_size = 0.6,
    sort_values = TRUE,
    FUN = function(x) x
)

selected_feature <- "ENSG00000204531_POU5F1"
p_embedding_stem_POU5F1 <- plot_embedding_value(
    embedding = embedding_stem[, c(x_column, y_column)],
    color_values = log10(matrix_cpm_use[selected_feature, embedding_stem$cell] + 1),
    colorbar_position = CB_POSITION,
    label = paste0(EMBEDDING_TITLE_PREFIX, "; ", selected_feature),
    label_position = NULL,
    geom_point_size = 0.6,
    sort_values = TRUE,
    FUN = function(x) x
)
purrr::reduce(list(
    p_embedding_stem_batch,
    p_embedding_stem_umi,
    p_embedding_stem_POU5F1
), `+`) +
    patchwork::plot_layout(ncol = 3) +
    patchwork::plot_annotation(
        theme = theme(plot.margin = margin())
    )

Polishing

p_embedding_stem_batch <- plot_embedding(
    embedding = embedding_stem[, c(x_column, y_column)],
    color_values = embedding_stem$batch %>% as.factor(),
    # label = "t-SNE; Batch",
    label_position = NULL,
    show_color_value_labels = FALSE,
    show_color_legend = TRUE,
    geom_point_size = 0.5,
    sort_values = FALSE
) +
    scale_color_manual(
        values = gg_color_hue(n = length(unique(embedding_stem$batch))),
        labels = c(
            "Naïve ESCs; 5iLA",
            "Blastoid derived nESCs; 5iLA",
            "Blastoid derived nEND; NACL",
            "Blastoid derived hTSCs; hTSM"
        )
    ) +
    guides(colour = guide_legend(override.aes = list(size = 2), ncol = 1)) +
    labs(color = NULL) +
    customized_theme(legend_key_size = 1, legend_text_size = 5) +
    guides(
        colour = guide_legend(
            ncol = 1,
            override.aes = list(size = 2)
        ),
        byrow = TRUE
    )
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
x_label <- ggplot_build(p_embedding_stem_batch)$layout$panel_params[[1]][c("x.range")] %>%
    unlist() %>%
    quantile(0.1)
y_label <- ggplot_build(p_embedding_stem_batch)$layout$panel_params[[1]][c("y.range")] %>%
    unlist() %>%
    quantile(0.8)

features_selected <- c(
    "ENSG00000181449_SOX2",
    "ENSG00000141448_GATA6",
    "ENSG00000107485_GATA3"
)

p_embedding_stem_SOX2_GATA6_GATA3 <- purrr::map(
    features_selected, function(x) {
        CB_POSITION <- c(0.875, 0.35)

        values <- log10(matrix_cpm_use[x, embedding_stem$cell] + 1)

        plot_embedding_value(
            embedding = embedding_stem[, c(x_column, y_column)],
            color_values = values,
            colorbar_position = CB_POSITION,
            label = str_c(x %>% str_remove("E.+_"), sep = "\n"),
            label_position = c(x_label, y_label),
            geom_point_size = 0.5,
            sort_values = TRUE,
            FUN = function(x) x,
            label_size = 1.8
        )
    }
)
# compose
list(
    p_embedding_stem_batch,
    p_embedding_stem_SOX2_GATA6_GATA3
) %>%
    purrr::reduce(`+`) +
    patchwork::plot_layout(nrow = 2) +
    patchwork::plot_annotation(
        theme = theme(plot.margin = margin())
    )


Trajectory inference

Data loading

EMBEDDING_FILE <- "embedding_timecourse.csv.gz"

embedding_timecourse <- read_csv(
    file = file.path(
        PROJECT_DIR,
        "github",
        "data",
        EMBEDDING_FILE
    )
)
## 
## ── Column specification ────────────────────────────────────────────────────────
## cols(
##   cell = col_character(),
##   batch = col_character(),
##   louvain = col_double(),
##   x_tsne = col_double(),
##   y_tsne = col_double(),
##   x_umap = col_double(),
##   y_umap = col_double(),
##   x_fitsne = col_double(),
##   y_fitsne = col_double(),
##   x_phate = col_double(),
##   y_phate = col_double(),
##   `x_min_dist=0.1` = col_double(),
##   `y_min_dist=0.1` = col_double(),
##   x_multicoretsne = col_double(),
##   y_multicoretsne = col_double()
## )
rm(EMBEDDING_FILE)


Embedding visualization

EMBEDDING_TITLE_PREFIX <- "FIt-SNE"

x_column <- "x_fitsne"
y_column <- "y_fitsne"

Clustering & batch & UMI

p_embedding_timecourse_cluster <- plot_embedding(
    embedding = embedding_timecourse[, c(x_column, y_column)],
    color_values = letters[as.integer(embedding_timecourse$louvain + 1)] %>% as.factor(),
    label = paste0(EMBEDDING_TITLE_PREFIX, ";", " Cluster"),
    label_position = NULL,
    show_color_value_labels = TRUE,
    show_color_legend = FALSE,
    geom_point_size = 0.6,
    sort_values = FALSE
) +
    scale_color_manual(
        values = gg_color_hue(n = length(unique(embedding_timecourse$louvain)))
    )

p_embedding_timecourse_batch <- plot_embedding(
    embedding = embedding_timecourse[, c(x_column, y_column)],
    color_values = embedding_timecourse$batch %>% as.factor(),
    label = paste0(EMBEDDING_TITLE_PREFIX, "; Batch"),
    label_position = NULL,
    show_color_value_labels = FALSE,
    show_color_legend = TRUE,
    geom_point_size = 0.45,
    sort_values = FALSE
) +
    scale_color_manual(
        values = gg_color_hue(n = length(unique(embedding_timecourse$batch)))
    ) +
    guides(colour = guide_legend(override.aes = list(size = 3), ncol = 1)) +
    labs(color = NULL) +
    customized_theme()

CB_POSITION <- c(0.8, 0.35)
p_embedding_timecourse_umi <- plot_embedding_value(
    embedding = embedding_timecourse[, c(x_column, y_column)],
    color_values = log10(Matrix::colSums(matrix_readcount_use[, embedding_timecourse$cell]) + 1),
    colorbar_position = CB_POSITION,
    label = paste0(EMBEDDING_TITLE_PREFIX, "; UMI"),
    label_position = NULL,
    geom_point_size = 0.6,
    sort_values = TRUE,
    FUN = function(x) x
)
purrr::reduce(
    list(
        p_embedding_timecourse_cluster,
        p_embedding_timecourse_batch,
        p_embedding_timecourse_umi
    ), `+`
) +
    patchwork::plot_layout(ncol = 3) +
    patchwork::plot_annotation(
        theme = theme(plot.margin = margin())
    )

Polishing

p_embedding_timecourse_batch <- plot_embedding(
    embedding = embedding_timecourse[, c(x_column, y_column)],
    color_values = embedding_timecourse$batch %>% as.factor(),
    # label = "t-SNE; Batch",
    label_position = NULL,
    show_color_value_labels = FALSE,
    show_color_legend = FALSE,
    geom_point_size = 0.4,
    sort_values = FALSE
) +
    scale_color_manual(
        values = yarrr::piratepal(palette = "google") %>% as.character(),
        labels = c("D3", "D6", "D9", "D9; PXGL")
    ) +
    guides(colour = guide_legend(override.aes = list(size = 2), ncol = 1)) +
    labs(color = NULL) +
    customized_theme(legend_key_size = 1, legend_text_size = 5) +
    guides(
        colour = guide_legend(
            ncol = 1,
            override.aes = list(size = 2)
        ),
        byrow = TRUE
    )
embedding_timecourse %<>%
    dplyr::select(cell:y_fitsne) %>% 
    left_join(
        embedding %>%
            select(cell, louvain),
        by = c("cell" = "cell")
    ) %>%
    dplyr::rename(louvain = louvain.x, louvain_ = louvain.y) %>%
    mutate(
        louvain_ = case_when(
            is.na(louvain_) ~ "NA",
            TRUE ~ as.character(louvain_)
        ),
        #
        lineage = case_when(
            louvain_ %in% c(11) ~ "EPI",
            louvain_ %in% c(18) ~ "HYP",
            louvain_ %in% c(0, 1, 8, 9, 12, 17) ~ "TE",
            louvain_ %in% c(10, 14) ~ "Pre-lineage",
            TRUE ~ "Other"
        ),
        lineage = factor(
            lineage,
            levels = c("Other", "TE", "Pre-lineage", "HYP", "EPI")
        ),
        #
        cluters_selected = case_when(
            louvain_ %in% c(2, 3, 4, 5, 6, 7, 13, 15, 16) ~ louvain_,
            TRUE ~ "Other"
        ),
        cluters_selected = factor(
            cluters_selected,
            levels =  c("Other", "2", "3", "4", "5", "6", "7", "13", "15","16")
        )
    )
p_embedding_timecourse_lineage <- embedding_timecourse %>%
    filter(
        lineage == "Other"
    ) %>%
    ggplot(
        aes(
            x = x_fitsne,
            y = y_fitsne
        )
    ) +
    geom_point(size = 0.6, stroke = 0, shape = 16, color = "grey70") +
    geom_point(
        data = embedding_timecourse %>% filter(lineage != "Other"),
        aes(x = x_fitsne, y = y_fitsne, color = lineage),
        size = 0.4, stroke = 0, shape = 16
    ) +
    scale_color_manual(
        name = NULL,
        values = c("grey70", "#8ace7e", "#ff684c", "#9467bd", "#ffd8b1"),
        breaks = c("Other", "EPI", "HYP", "TE", "Pre-lineage"),
        labels = c("Other", "ELCs", "HLCs", "TLCs", "Pre-lineage-like")
    ) +
    theme_void() +
    customized_theme(legend_key_size = 1, legend_text_size = 5) +
    guides(
        colour = guide_legend(
            ncol = 1,
            override.aes = list(size = 2)
        ),
        byrow = TRUE
    )
p_embedding_timecourse_cluster_matched <- embedding_timecourse %>%
    filter(
        cluters_selected == "Other"
    ) %>%
    ggplot(aes(x = x_fitsne, y = y_fitsne)) +
    geom_point(size = 0.6, stroke = 0, shape = 16, color = "grey70") +
    geom_point(
        data = embedding_timecourse %>% filter(cluters_selected != "Other"),
        aes(x = x_fitsne, y = y_fitsne, color = cluters_selected),
        size = 0.4, stroke = 0, shape = 16
    ) +
    scale_color_manual(
        name = NULL,
        values = c("Other" = "grey70", color_palette_cluster)
    ) +
    theme_void() +
    customized_theme(legend_key_size = 1, legend_text_size = 5) +
    guides(
        colour = guide_legend(
            ncol = 2,
            override.aes = list(size = 2)
        ),
        byrow = TRUE
    ) +
    ggrepel::geom_text_repel(
        data = get_middle_points(
            embedding_timecourse %>% filter(cluters_selected != "Other"),
            x_fitsne,
            y_fitsne,
            cluters_selected
        ),
        ggplot2::aes(
            x = x_fitsne,
            y = y_fitsne,
            label = cluters_selected
        ),
        color = "black",
        size = 1.8,
        family = "Arial",
        #
        box.padding = 0.4,
        point.padding = 1e-06,
        min.segment.length = 0,
        arrow = ggplot2::arrow(length = unit(0.015, "npc")),
        max.overlaps = Inf,
        nudge_x = 0,
        nudge_y = 10,
        #
        segment.color = "grey35",
        segment.size = 0.25,
        segment.alpha = 1,
        # segment.inflect = TRUE,
        seed = 20201121
    )
## `summarise()` has ungrouped output. You can override using the `.groups` argument.
## Joining, by = "cluters_selected"
## Warning in min(distance): no non-missing arguments to min; returning Inf
## Joining, by = "cluters_selected"
## Joining, by = "cluters_selected"
## Joining, by = "cluters_selected"
## Joining, by = "cluters_selected"
## Joining, by = "cluters_selected"
## Joining, by = "cluters_selected"
## Joining, by = "cluters_selected"
## Joining, by = "cluters_selected"
## Joining, by = "cluters_selected"
## Warning: Ignoring unknown parameters: max.overlaps
# vertical
purrr::reduce(
    list(
        p_embedding_timecourse_batch,
        p_embedding_timecourse_lineage,
        p_embedding_timecourse_cluster_matched
    ), `+`
) +
    patchwork::plot_layout(ncol = 1) +
    patchwork::plot_annotation(
        theme = theme(plot.margin = margin())
    )

Expression

CB_POSITION <- c(0.8, 0.36)
x_label <- ggplot_build(p_embedding_timecourse_batch)$layout$panel_params[[1]][c("x.range")] %>%
    unlist() %>%
    quantile(0.1)
y_label <- ggplot_build(p_embedding_timecourse_batch)$layout$panel_params[[1]][c("y.range")] %>%
    unlist() %>%
    quantile(0.875)

purrr::map(features_selected, function(x) {
    selected_feature <- x

    plot_embedding_value(
        embedding = embedding_timecourse[, c(x_column, y_column)],
        color_values = log10(
            matrix_cpm_use[selected_feature, embedding_timecourse$cell] + 1
        ),
        colorbar_position = CB_POSITION,
        label = x %>% str_remove("^E.+_"),
        label_position = c(x_label, y_label),
        geom_point_size = 0.5,
        sort_values = TRUE,
        FUN = function(x) x
    )
}) %>%
    purrr::reduce(`+`) +
    plot_layout(nrow = 1) +
    plot_annotation(theme = theme(plot.margin = margin()))


Cluster composition

prepare_cluster_composition(
    embedding = embedding_timecourse,
    x = louvain,
    group = batch
) %>%
    mutate(
        louvain = as.factor(louvain)
    ) %>%
    plot_barplot(
        x = louvain,
        y = percentage,
        z = batch
    ) + 
    scale_fill_manual(
        values = yarrr::piratepal(palette = "google") %>% as.character(),
        labels = c("D3", "D6", "D9", "D9; PXGL")
    )



R session info

devtools::session_info()$platform
##  setting  value                       
##  version  R version 4.0.3 (2020-10-10)
##  os       macOS  11.1                 
##  system   x86_64, darwin20.1.0        
##  ui       unknown                     
##  language (EN)                        
##  collate  en_US.UTF-8                 
##  ctype    en_US.UTF-8                 
##  tz       America/Chicago             
##  date     2020-12-16
devtools::session_info()$pack %>%
    as_tibble() %>%
    dplyr::select(
        package,
        loadedversion,
        date,
        `source`
    ) %>%
    # print(n = nrow(.))
    gt::gt() %>%
    gt::tab_options(table.font.size = "median")
package loadedversion date source
assertthat 0.2.1 2019-03-21 CRAN (R 4.0.0)
backports 1.2.1 2020-12-09 CRAN (R 4.0.3)
BayesFactor 0.9.12-4.2 2018-05-19 CRAN (R 4.0.3)
broom 0.7.2.9000 2020-12-12 Github (tidymodels/broom@810115d)
callr 3.5.1.9000 2020-11-23 Github (r-lib/callr@d44e660)
cellranger 1.1.0 2016-07-27 CRAN (R 4.0.0)
checkmate 2.0.0 2020-02-06 CRAN (R 4.0.0)
circlize 0.4.11 2020-10-31 CRAN (R 4.0.3)
cli 2.2.0 2020-11-20 CRAN (R 4.0.3)
coda 0.19-4 2020-09-30 CRAN (R 4.0.2)
codetools 0.2-16 2018-12-24 CRAN (R 4.0.3)
colorspace 2.0-0 2020-11-10 R-Forge (R 4.0.3)
crayon 1.3.4 2017-09-16 CRAN (R 4.0.0)
DBI 1.1.0 2019-12-15 CRAN (R 4.0.0)
dbplyr 2.0.0 2020-11-03 CRAN (R 4.0.3)
desc 1.2.0 2018-05-01 CRAN (R 4.0.0)
devtools 2.3.1.9000 2020-12-05 Github (r-lib/devtools@b3be019)
digest 0.6.27 2020-10-24 CRAN (R 4.0.3)
dplyr 1.0.2.9000 2020-12-14 Github (tidyverse/dplyr@59105eb)
ellipsis 0.3.1 2020-05-15 CRAN (R 4.0.3)
evaluate 0.14 2019-05-28 CRAN (R 4.0.0)
extrafont 0.17 2014-12-08 CRAN (R 4.0.2)
extrafontdb 1.0 2012-06-11 CRAN (R 4.0.0)
fansi 0.4.1 2020-01-08 CRAN (R 4.0.0)
farver 2.0.3 2020-01-16 CRAN (R 4.0.0)
forcats 0.5.0.9000 2020-12-09 Github (tidyverse/forcats@6c5e59f)
fs 1.5.0 2020-07-31 CRAN (R 4.0.3)
generics 0.1.0 2020-10-31 CRAN (R 4.0.3)
ggplot2 3.3.2.9000 2020-12-14 Github (tidyverse/ggplot2@9deb97b)
GlobalOptions 0.1.2 2020-06-10 CRAN (R 4.0.0)
glue 1.4.2 2020-08-27 CRAN (R 4.0.2)
gt 0.2.2 2020-11-25 Github (rstudio/gt@bae32f4)
gtable 0.3.0 2019-03-25 CRAN (R 4.0.0)
gtools 3.8.2 2020-03-31 CRAN (R 4.0.0)
haven 2.3.1 2020-06-01 CRAN (R 4.0.0)
hms 0.5.3 2020-01-08 CRAN (R 4.0.0)
htmltools 0.5.0.9003 2020-12-03 Github (rstudio/htmltools@d18bd8e)
httr 1.4.2 2020-07-20 CRAN (R 4.0.2)
jpeg 0.1-8.1 2019-10-24 CRAN (R 4.0.0)
jsonlite 1.7.2 2020-12-09 CRAN (R 4.0.3)
knitr 1.30.4 2020-12-15 Github (yihui/knitr@f8f90ba)
labeling 0.4.2 2020-10-20 CRAN (R 4.0.3)
lattice 0.20-41 2020-04-02 CRAN (R 4.0.3)
lifecycle 0.2.0 2020-03-06 CRAN (R 4.0.0)
lubridate 1.7.9.2 2020-12-12 Github (tidyverse/lubridate@04be2a8)
magrittr 2.0.1.9000 2020-12-14 Github (tidyverse/magrittr@bb1c86a)
Matrix 1.2-18 2019-11-27 CRAN (R 4.0.2)
MatrixModels 0.4-1 2015-08-22 CRAN (R 4.0.0)
memoise 1.1.0 2017-04-21 CRAN (R 4.0.0)
modelr 0.1.8.9000 2020-11-23 Github (tidyverse/modelr@16168e0)
munsell 0.5.0 2018-06-12 CRAN (R 4.0.0)
mvtnorm 1.1-1 2020-06-09 CRAN (R 4.0.2)
patchwork 1.1.0 2020-11-09 CRAN (R 4.0.3)
pbapply 1.4-3 2020-08-18 CRAN (R 4.0.2)
pillar 1.4.7 2020-11-20 CRAN (R 4.0.3)
pkgbuild 1.1.0 2020-07-13 CRAN (R 4.0.3)
pkgconfig 2.0.3 2019-09-22 CRAN (R 4.0.0)
pkgload 1.1.0 2020-05-29 CRAN (R 4.0.0)
prettyunits 1.1.1.9000 2020-11-23 Github (r-lib/prettyunits@b1cdad8)
processx 3.4.5 2020-11-30 CRAN (R 4.0.3)
ps 1.5.0 2020-12-05 CRAN (R 4.0.3)
purrr 0.3.4.9000 2020-11-23 Github (tidyverse/purrr@af06d45)
R6 2.5.0 2020-11-02 Github (r-lib/R6@6cf7d4e)
ragg 1.0.0.9000 2020-12-16 Github (r-lib/ragg@cd1a50f)
Rcpp 1.0.5 2020-07-06 CRAN (R 4.0.3)
readr 1.4.0.9000 2020-11-23 Github (tidyverse/readr@97186a8)
readxl 1.3.1.9000 2020-11-23 Github (tidyverse/readxl@9f85fa5)
remotes 2.2.0.9000 2020-12-03 Github (r-lib/remotes@5d0d9fd)
reprex 0.3.0 2019-05-16 CRAN (R 4.0.0)
reticulate 1.18 2020-10-25 CRAN (R 4.0.3)
rlang 0.4.9.9000 2020-12-16 Github (r-lib/rlang@b5f7d34)
rmarkdown 2.6.0001 2020-12-15 Github (rstudio/rmarkdown@80f14b2)
rprojroot 2.0.2 2020-11-15 CRAN (R 4.0.3)
rstudioapi 0.13 2020-11-12 CRAN (R 4.0.3)
Rttf2pt1 1.3.8 2020-01-10 CRAN (R 4.0.0)
rvest 0.3.6 2020-07-25 CRAN (R 4.0.2)
scales 1.1.1 2020-05-11 CRAN (R 4.0.3)
sessioninfo 1.1.1 2018-11-05 CRAN (R 4.0.3)
shape 1.4.5 2020-09-13 CRAN (R 4.0.2)
stringi 1.5.3 2020-09-09 CRAN (R 4.0.2)
stringr 1.4.0.9000 2020-11-23 Github (tidyverse/stringr@1f03eb0)
styler 1.3.2.9000 2020-11-30 Github (r-lib/styler@d5b8b0e)
systemfonts 0.3.2.9000 2020-12-16 Github (r-lib/systemfonts@7df5e29)
testthat 3.0.0.9000 2020-12-15 Github (r-lib/testthat@6227640)
textshaping 0.2.1.9000 2020-12-16 Github (r-lib/textshaping@e71ce33)
tibble 3.0.4.9000 2020-12-13 Github (tidyverse/tibble@5881b43)
tidyr 1.1.2.9000 2020-12-09 Github (tidyverse/tidyr@581b488)
tidyselect 1.1.0 2020-05-11 CRAN (R 4.0.3)
tidyverse 1.3.0.9000 2020-11-23 Github (hadley/tidyverse@8a0bb99)
usethis 2.0.0.9000 2020-12-13 Github (r-lib/usethis@45d0fef)
vctrs 0.3.5 2020-11-17 CRAN (R 4.0.3)
viridisLite 0.3.0 2018-02-01 CRAN (R 4.0.0)
withr 2.3.0 2020-09-22 CRAN (R 4.0.2)
xfun 0.19 2020-10-30 CRAN (R 4.0.3)
xml2 1.3.2 2020-04-23 CRAN (R 4.0.0)
yaml 2.2.1 2020-02-01 CRAN (R 4.0.0)
yarrr 0.1.5 2017-04-19 CRAN (R 4.0.3)